data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")
# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)
print(data)
# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")
Summary statistics
Histogram
Density
Box_Plot
qq_Plot
Shapiro-Wilk normality test
I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.
data$Categorical <- ifelse(data$T4Cortisol >= 956, "H",
ifelse(data$T4Cortisol <= 190.8, "L", "M"))
table(data$Categorical)
library(ggplot2)
# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))
# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
geom_bar() +
labs(title = "Histogram of T4Cortisol by Category",
x = "Category",
y = "Frequency") +
theme_minimal()
# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
labs(title = "Histogram of T4Cortisol with Color by Category",
x = "T4 Cortisol",
y = "Frequency",
fill = "Category") +
scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
theme_minimal()
# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
geom_density(alpha = 0.3) +
labs(title = "Density Plot of T4Cortisol with Color by Category",
x = "T4Cortisol",
y = "Density",
fill = "Category") +
scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
theme_minimal()
# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
geom_density() +
geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
labs(title = "Density Plot of T4Cortisol with Vertical Lines",
x = "T4Cortisol",
y = "Density") +
theme_minimal()
The animals were sorted in these three categories >H = Hight >M = Medium >L = Low
The individuals frequency distribution in theese categories are shown in the plots below
Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.
library(tidyverse)
data_no_out <- filter(data, T4Cortisol < 1250)
# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")
boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
shapiro.test(data$T4Cortisol)
Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno
In this folder we found the following files:
I made a copy of this files in a folder called Raw_files:
/home/bambrozi/2_CORTISOL/RawFiles
This directory has two sub-directories:To perfome this I used two codes
sed -i '1i id Call' genotypes.txt
filename = 'bruno_gntps.txt'
outputFileOpen = open('genoplink.ped','w')
recode = {'0':['C','C'] , '1':['A','C'] , '2':['A','A'] , '5':['0','0'] }
for line in open(filename,'r'):
if 'Call' in line : continue
ids, call = line.strip().split()
genotypes = [ recode[geno012][0] +' '+ recode[geno012][1] for geno012 in call ]
outputFileOpen.write("%s %s %s %s %s %s %s\n" % ('HO', ids, '0','0','0','-9',' '.join(genotypes)) )
Now I have a folder called /home/bambrozi/2_CORTISOL/Geno_files with the file named genoplink.ped
library(data.table)
pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]
#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)
#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,]
#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)
The phenotype file should have three columns: FID, Animal_id, Phenotype
HO <- rep("HO", 252)
pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))
colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")
pheno_gwas$cdn_id <- as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)
write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)
map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
We ran the code below to perfom the QC
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
/home/local/bin/plink \
--bfile ${DATA} \
--cow \
--allow-no-sex \
--hwe 1e-5 \
--maf 0.01 \
--geno 0.1 \
--mind 0.1 \
--keep-allele-order \
--make-bed \
--out ${RESULT}
The server screen outcome is shown below.
After the Quality Control we ended up with
To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink.
#!/bin/bash
DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/ONLY_GRASSHILL_AND_PHENO_after_QC/only_grasshill_and_pheno_clean
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/KING/result_king
plink2 \
--bfile ${DATA} \
--chr-set 29 \
--make-king-table \
--out ${RESULT}
This is the output screen on terminal:
The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.
Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga:
setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")
relatedness="result_king.kin0" ## change accordingly!!
library(data.table)
print(relatedness)
rel=fread(relatedness, h = T)
head(rel)
print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354 & IID1!=IID2)
print(dup)
nrow(dup)
So the code above will provide this output if there is any duplicated individual.
We do not have any duplicated individual
So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC
files:genoplink_clean
After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file
Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.
#!/bin/bash
DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/imput_pca
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/pca_result
plink \
--bfile ${DATA} \
--keep-allele-order \
--chr-set 29 \
--pca \
--out ${RESULT}
The PCA output:
After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot
setwd("/home/bambrozi/2_CORTISOL/PCA")
library(ggplot2)
library(tidyverse)
##
# Visualize PCA results
###
# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)
## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
theme_minimal()
dev.off()
# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) + # Add labels for animal IDs
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
theme_minimal()
dev.off()
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
This is te output from the code above:
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/nrow(gwas))
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni correction"), bty="n")
The code above creates a Manhattam Plot, the correction for multiple test is the Bonferroni correction
The code below will save information of Significant SNP.
library(tidyverse)
SNP_sig_bonf <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
Below we can see the 1 significant SNP for Bonferroni correction.
gwas$bh <- p.adjust(gwas$p, method = "BH") #FDR
The code below will create a file with the significant snp for FDR-BH
SNP_sig_BH <- filter(gwas, bh < 0.05)
write.table(SNP_sig_BH, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_BH.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
As outcome we can see that the FDR-BH method didn’t increase the number of significant SNP.
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
Below we can find the list of significant SNPs after the correction for Multiple Testing
I performed the qqPlot analysis using the code below:
gwas<- read.table("GWAS_result.loco.mlma", h=T)
ps<- gwas$p
inflation <- function(ps) {
chisq <- qchisq(1 - ps, 1)
lambda <- median(chisq) / qchisq(0.5, 1)
lambda
}
# Calculating the lambda - the lambda statistic should be close to 1 if
#the points fall within the expected range, or greater than one if
# the observed p-values are more significant than expected.
inflation(ps)
bonf<- -log10(0.05/nrow(gwas))
gwas$log<- -log10(gwas$p)
gg_qqplot <- function(ps, ci = 0.95) {
n <- length(ps)
df <- data.frame(
observed = -log10(sort(ps)),
expected = -log10(ppoints(n)),
clower = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
cupper = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
)
log10Pe <- expression(paste("Expected -log"[10], plain(P)))
log10Po <- expression(paste("Observed -log"[10], plain(P)))
ggplot(df) +
geom_ribbon(
mapping = aes(x = expected, ymin = clower, ymax = cupper),
alpha = 0.1
) +
geom_point(aes(expected, observed), shape = 1, size = 3) +
geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
# geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
# geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
xlab(log10Pe) +
ylab(log10Po)
}
## plot -----
gg_qqplot(ps) +
theme_bw(base_size = 24) +
annotate(
geom = "text",
x = -Inf,
y = Inf,
hjust = -0.15,
vjust = 1 + 0.15 * 3,
label = sprintf("λ = %.2f", inflation(ps)),
size = 8
) +
theme(
axis.ticks = element_line(size = 0.5),
panel.grid = element_blank()
# panel.grid = element_line(size = 0.5, color = "grey80")
)
The outcome is the qqplot below, and the lambda value is \(\lambda\) = 1.03
After insert all rsID on VEP the summary is shown below:
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs42217767 ps.
this variant wasn’t recovered automaticaly by VEP, so I typed the chr
and position on the search area of the “genome viewer”
rs110031217
rs110991998
rs42089058
rs41644634
rs41567074
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt",
head=T, stringsAsFactors = F)
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k_clean.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 11 | BTB-01060598 | 32301594 | 11 | 32278324 | 32766620 | 488297 | - | ENSBTAG00000046199 | NRXN1 | protein_coding |
| 2 | 11 | BTB-01060598 | 32301594 | 11 | 32280416 | 32280512 | 97 | - | ENSBTAG00000044573 | U6 | snRNA |
| 3 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 5 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 6 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 7 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 8 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 9 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 10 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 11 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 2 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 5 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 6 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 7 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 8 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 9 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 10 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 11 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 12 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 13 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 14 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 17 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 18 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 19 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 20 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 21 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 22 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 23 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 24 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 25 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 26 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 27 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 28 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 29 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 30 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 31 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 32 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 33 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 34 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 35 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 36 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 37 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 38 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 39 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 40 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 41 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 42 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 43 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 44 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 45 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 46 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 47 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 48 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 49 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 50 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 51 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 52 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 53 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 54 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 55 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 56 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 57 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 58 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 59 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 60 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 61 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 62 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 63 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 64 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 65 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 66 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 67 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 68 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 69 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 70 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 71 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 72 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 73 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 74 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 75 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 76 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 77 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 78 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 79 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 80 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 81 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 82 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 83 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 84 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 85 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 86 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 87 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 88 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 89 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 90 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 91 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 92 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 93 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 94 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 95 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 96 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 97 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 98 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 99 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 100 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 101 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 102 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 103 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 104 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 105 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 106 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 107 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 108 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 109 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 110 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 111 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 112 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 113 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 115 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 117 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 119 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 120 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 121 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 122 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 123 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 124 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 125 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 126 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 127 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 128 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 129 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 130 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 131 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 132 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 133 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 134 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 135 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 136 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 137 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 138 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 139 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 140 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 141 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 142 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#GALLO
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_name.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_type.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 5 | Milk C14 index | 35 | 4437 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 9 | Milk myristoleic acid content | 26 | 3047 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 10 | Milk palmitoleic acid content | 15 | 2422 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 6 | Milk C16 index | 12 | 2002 | 108 | 163224 | 0.0000000 | 0.0000001 | Milk |
| 12 | Muscle carnosine content | 3 | 67 | 108 | 163224 | 0.0000131 | 0.0000497 | Meat and Carcass |
| 14 | Pregnancy rate | 2 | 944 | 108 | 163224 | 0.1296627 | 0.4105985 | Reproduction |
| 18 | Teat length | 1 | 300 | 108 | 163224 | 0.1802436 | 0.4892327 | Exterior |
| 15 | Rear leg placement - side view | 1 | 430 | 108 | 163224 | 0.2479752 | 0.5235032 | Exterior |
| 17 | Stillbirth | 2 | 1363 | 108 | 163224 | 0.2280294 | 0.5235032 | Reproduction |
| 3 | Foot angle | 1 | 672 | 108 | 163224 | 0.3596272 | 0.6162877 | Exterior |
| 7 | Milk capric acid content | 1 | 912 | 108 | 163224 | 0.4541067 | 0.6162877 | Milk |
| 8 | Milk myristic acid content | 1 | 902 | 108 | 163224 | 0.4504612 | 0.6162877 | Milk |
| 13 | Net merit | 1 | 903 | 108 | 163224 | 0.4508269 | 0.6162877 | Production |
| 19 | Udder depth | 1 | 695 | 108 | 163224 | 0.3693423 | 0.6162877 | Exterior |
| 16 | Somatic cell score | 1 | 1122 | 108 | 163224 | 0.5253603 | 0.6654564 | Health |
| 2 | Conception rate | 1 | 1255 | 108 | 163224 | 0.5656375 | 0.6716945 | Reproduction |
| 1 | Calving ease | 2 | 3819 | 108 | 163224 | 0.7219243 | 0.7776744 | Reproduction |
| 4 | Length of productive life | 1 | 2004 | 108 | 163224 | 0.7367441 | 0.7776744 | Production |
| 11 | Milk yield | 1 | 6432 | 108 | 163224 | 0.9870080 | 0.9870080 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 4 | 9077 | 108 | 163224 | 0.8569775 | 0.9999953 |
| 2 | Health | 1 | 5889 | 108 | 163224 | 0.9811250 | 0.9999953 |
| 3 | Meat and Carcass | 3 | 18258 | 108 | 163224 | 0.9997109 | 0.9999953 |
| 4 | Milk | 91 | 75352 | 108 | 163224 | 0.0000000 | 0.0000000 |
| 5 | Production | 2 | 19640 | 108 | 163224 | 0.9999848 | 0.9999953 |
| 6 | Reproduction | 7 | 35008 | 108 | 163224 | 0.9999953 | 0.9999953 |
online version: https://biit.cs.ut.ee/gprofiler/gost
I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.
### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)
#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list
#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", header = T)
query <- query[,c("gene_id")]
gene_enrich <- gost(
query,
organism = "btaurus",
ordered_query = FALSE,
multi_query = FALSE,
significant = TRUE,
exclude_iea = FALSE,
measure_underrepresentation = FALSE,
evcodes = TRUE,
user_threshold = 0.05,
correction_method = c("fdr"),
domain_scope = c("annotated"),
numeric_ns = "",
sources = NULL,
as_short_link = FALSE,
highlight = FALSE
)
#str(gene_enrich) # para ver o formato dos meus dados
#selecionando apenas as informações da lista que me interessam para fazer meu data.frame
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
ID = result_enrich$term_id,
Term = result_enrich$term_name,
adj_pvalue = result_enrich$p_value,
id_ensembl = result_enrich$intersection)
write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.csv")
Below we can se the significant terms of this enrichment (output):
| X | Category | ID | Term | adj_pvalue | id_ensembl |
|---|---|---|---|---|---|
| 1 | GO:BP | GO:0006538 | glutamate catabolic process | 0.0383792 | ENSBTAG00000007540 |
| 2 | GO:BP | GO:0003274 | endocardial cushion fusion | 0.0383792 | ENSBTAG00000019832 |
| 3 | GO:BP | GO:1905005 | regulation of epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0383792 | ENSBTAG00000019832 |
| 4 | GO:BP | GO:1905007 | positive regulation of epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0383792 | ENSBTAG00000019832 |
| 5 | GO:BP | GO:0003430 | growth plate cartilage chondrocyte growth | 0.0383792 | ENSBTAG00000019832 |
| 6 | GO:BP | GO:0003431 | growth plate cartilage chondrocyte development | 0.0383792 | ENSBTAG00000019832 |
| 7 | GO:BP | GO:0003433 | chondrocyte development involved in endochondral bone morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 8 | GO:BP | GO:0003415 | chondrocyte hypertrophy | 0.0383792 | ENSBTAG00000019832 |
| 9 | GO:BP | GO:0048631 | regulation of skeletal muscle tissue growth | 0.0383792 | ENSBTAG00000011743 |
| 10 | GO:BP | GO:0051138 | positive regulation of NK T cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 11 | GO:BP | GO:0062043 | positive regulation of cardiac epithelial to mesenchymal transition | 0.0383792 | ENSBTAG00000019832 |
| 12 | GO:BP | GO:0051136 | regulation of NK T cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 13 | GO:BP | GO:0003186 | tricuspid valve morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 14 | GO:BP | GO:0062042 | regulation of cardiac epithelial to mesenchymal transition | 0.0383792 | ENSBTAG00000019832 |
| 15 | GO:BP | GO:0060434 | bronchus morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 16 | GO:BP | GO:0060440 | trachea formation | 0.0383792 | ENSBTAG00000019832 |
| 17 | GO:BP | GO:0060462 | lung lobe development | 0.0383792 | ENSBTAG00000019832 |
| 18 | GO:BP | GO:0060463 | lung lobe morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 19 | GO:BP | GO:0048632 | negative regulation of skeletal muscle tissue growth | 0.0383792 | ENSBTAG00000011743 |
| 20 | GO:BP | GO:0003175 | tricuspid valve development | 0.0383792 | ENSBTAG00000019832 |
| 21 | GO:BP | GO:0061520 | Langerhans cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 22 | GO:BP | GO:1905317 | inferior endocardial cushion morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 23 | GO:BP | GO:2000563 | positive regulation of CD4-positive, alpha-beta T cell proliferation | 0.0383792 | ENSBTAG00000019832 |
| 24 | GO:BP | GO:1990428 | miRNA transport | 0.0383792 | ENSBTAG00000019832 |
| 25 | GO:BP | GO:0002513 | tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 26 | GO:BP | GO:0002514 | B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 27 | GO:BP | GO:0003149 | membranous septum morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 28 | GO:BP | GO:1990086 | lens fiber cell apoptotic process | 0.0383792 | ENSBTAG00000019832 |
| 29 | GO:BP | GO:0002520 | immune system development | 0.0383792 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 30 | GO:BP | GO:0002666 | positive regulation of T cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 31 | GO:BP | GO:0002651 | positive regulation of tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 32 | GO:BP | GO:0061343 | cell adhesion involved in heart morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 33 | GO:BP | GO:0002649 | regulation of tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 34 | GO:BP | GO:0002661 | regulation of B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 35 | GO:BP | GO:0002664 | regulation of T cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 36 | GO:BP | GO:0002663 | positive regulation of B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 37 | GO:BP | GO:0002684 | positive regulation of immune system process | 0.0428818 | ENSBTAG00000019832,ENSBTAG00000019872,ENSBTAG00000019194 |
| 38 | GO:BP | GO:0048630 | skeletal muscle tissue growth | 0.0431888 | ENSBTAG00000011743 |
| 39 | GO:BP | GO:0051251 | positive regulation of lymphocyte activation | 0.0431888 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 40 | GO:BP | GO:0034154 | toll-like receptor 7 signaling pathway | 0.0431888 | ENSBTAG00000019872 |
| 41 | GO:BP | GO:0060439 | trachea morphogenesis | 0.0431888 | ENSBTAG00000019832 |
| 42 | GO:BP | GO:0002517 | T cell tolerance induction | 0.0431888 | ENSBTAG00000019832 |
| 43 | GO:BP | GO:0002645 | positive regulation of tolerance induction | 0.0431888 | ENSBTAG00000019832 |
| 44 | GO:BP | GO:0060433 | bronchus development | 0.0460331 | ENSBTAG00000019832 |
| 45 | GO:BP | GO:0003418 | growth plate cartilage chondrocyte differentiation | 0.0460331 | ENSBTAG00000019832 |
| 46 | GO:BP | GO:0002696 | positive regulation of leukocyte activation | 0.0482941 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 47 | GO:BP | GO:0001865 | NK T cell differentiation | 0.0489635 | ENSBTAG00000019832 |
| 48 | GO:BP | GO:0050867 | positive regulation of cell activation | 0.0496277 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 49 | GO:BP | GO:0003198 | epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0496277 | ENSBTAG00000019832 |
| 50 | GO:BP | GO:0002643 | regulation of tolerance induction | 0.0496277 | ENSBTAG00000019832 |
| 51 | GO:BP | GO:2001034 | positive regulation of double-strand break repair via nonhomologous end joining | 0.0496277 | ENSBTAG00000019194 |
| 52 | GO:MF | GO:0004352 | glutamate dehydrogenase (NAD+) activity | 0.0098128 | ENSBTAG00000007540 |
| 53 | GO:MF | GO:0004353 | glutamate dehydrogenase [NAD(P)+] activity | 0.0098128 | ENSBTAG00000007540 |
| 54 | GO:MF | GO:0016639 | oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor | 0.0098128 | ENSBTAG00000007540 |
| 55 | GO:MF | GO:0005026 | transforming growth factor beta receptor activity, type II | 0.0147171 | ENSBTAG00000019832 |
| 56 | GO:MF | GO:0048185 | activin binding | 0.0326677 | ENSBTAG00000019832 |
| 57 | GO:MF | GO:0036312 | phosphatidylinositol 3-kinase regulatory subunit binding | 0.0326677 | ENSBTAG00000019872 |
| 58 | GO:MF | GO:0034713 | type I transforming growth factor beta receptor binding | 0.0326677 | ENSBTAG00000019832 |
| 59 | GO:MF | GO:0017002 | activin receptor activity | 0.0326677 | ENSBTAG00000019832 |
| 60 | GO:MF | GO:0005024 | transforming growth factor beta receptor activity | 0.0326677 | ENSBTAG00000019832 |
| 61 | GO:MF | GO:0050431 | transforming growth factor beta binding | 0.0461234 | ENSBTAG00000019832 |
| 62 | GO:MF | GO:0043548 | phosphatidylinositol 3-kinase binding | 0.0461234 | ENSBTAG00000019872 |
| 63 | GO:MF | GO:0016638 | oxidoreductase activity, acting on the CH-NH2 group of donors | 0.0461234 | ENSBTAG00000007540 |
| 64 | GO:MF | GO:0005160 | transforming growth factor beta receptor binding | 0.0461234 | ENSBTAG00000019832 |
| 65 | GO:MF | GO:0004675 | transmembrane receptor protein serine/threonine kinase activity | 0.0461234 | ENSBTAG00000019832 |
| 66 | REAC | REAC:R-BTA-2243919 | Crosslinking of collagen fibrils | 0.0440084 | ENSBTAG00000011743 |
| 67 | REAC | REAC:R-BTA-2173788 | Downregulation of TGF-beta receptor signaling | 0.0440084 | ENSBTAG00000019832 |
| 68 | REAC | REAC:R-BTA-8964539 | Glutamate and glutamine metabolism | 0.0440084 | ENSBTAG00000007540 |
| 69 | REAC | REAC:R-BTA-112308 | Presynaptic depolarization and calcium channel opening | 0.0440084 | ENSBTAG00000013117 |
| 70 | REAC | REAC:R-BTA-2151201 | Transcriptional activation of mitochondrial biogenesis | 0.0440084 | ENSBTAG00000007540 |
| 71 | REAC | REAC:R-BTA-2173791 | TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) | 0.0440084 | ENSBTAG00000019832 |
From the online version of GPROFILER i got the following results.
Legend
Additionaly I perfomed a another version of this analysis but with no eletronic GO annotation
Now I’m gonna make a pie chart with the categories
library(ggplot2)
# Assuming sig_enrich$Category contains the categories
category_counts <- table(result_enrich$Category)
# Create a pie chart
pie_chart <- ggplot(data = NULL, aes(x = factor(1), fill = names(category_counts), y = as.numeric(category_counts))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Distribution among terms",
x = NULL,
y = NULL,
fill = "Category") +
theme_void() +
theme(legend.position = "right")
# Print the pie chart
print(pie_chart)
First I need to generate the .ped file
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
plink \
--bfile ${DATA} \
--cow \
--recode \
--out ${RESULT}
Second, is necessary to convert the .ped file to Linkage format:
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in
plink \
--file ${DATA} \
--cow \
--recode HV \
--out ${RESULT}
The code above will generate a pair of files .ped and .info for each chromosome
I ran the haploview only for chromosome 11, 22, 26, 28 that are when the significant SNP are located.
As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot
| Chr | SNP | bp | A1 | A2 | Freq | Haploview.order |
|---|---|---|---|---|---|---|
| 11 | BTB-01060598 | 32301594 | C | A | 0.4503970 | 680 |
| 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1865080 | 88 |
| 22 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0813492 | 762 |
| 26 | BTB-00926636 | 17857480 | C | A | 0.0257937 | 296 |
| 26 | BTA-62125-no-rs | 17899619 | C | A | 0.0257937 | 297 |
| 28 | BTA-99379-no-rs | 41666186 | A | C | 0.0496032 | 701 |
The two significant SNPs from chromosome 26 fell within a Haploblock.
Chr 11 (SNP 680)
Chr 22 (SNP 88)
Chr 22 (SNP 762)
Chr 26 (SNP 296 and 297)
Chr 28 (SNP 701)
As we found an haploblock on chromosome 26 I decided take a look inside this block. So, on Haploview I checked the name and position of the first SNP of this block (292) and the last (300).
| X. | Name | Position | ObsHET | PredHET | HWpval | X.Geno | FamTrio | MendErr | MAF | Alleles |
|---|---|---|---|---|---|---|---|---|---|---|
| 292 | ARS-BFGL-NGS-111577 | 17714604 | 0.401 | 0.448 | 0.1161 | 100 | 0 | 0 | 0.339 | A:C |
| 293 | ARS-BFGL-NGS-21408 | 17746468 | 0.369 | 0.403 | 0.2260 | 100 | 0 | 0 | 0.280 | C:A |
| 294 | ARS-BFGL-NGS-16328 | 17778908 | 0.056 | 0.054 | 1.0000 | 100 | 0 | 0 | 0.028 | A:C |
| 295 | ARS-BFGL-NGS-117063 | 17817225 | 0.413 | 0.452 | 0.2022 | 100 | 0 | 0 | 0.345 | C:A |
| 296 | BTB-00926636 | 17857480 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | A:C |
| 297 | BTA-62125-no-rs | 17899619 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | A:C |
| 298 | ARS-BFGL-NGS-110679 | 17953795 | 0.397 | 0.433 | 0.2211 | 100 | 0 | 0 | 0.317 | A:C |
| 299 | ARS-BFGL-NGS-72889 | 17989430 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | C:A |
| 300 | BTB-00926868 | 18040673 | 0.385 | 0.420 | 0.2319 | 100 | 0 | 0 | 0.300 | A:C |
Then I checked on NCBI’s Genome Data Viewer which genes are inside this block, adding 50k before the first SNP and 50K after the last SNP.
This interval has 426,069bp and has this genes inside:
As you can see in the image below:
The genes TLL2, TM9SF3 and PIK3AP1 already were in the gene list from GALLO, but the genes DNTT and OPALIN were new!
I performed a GPROFILER analysis only for these genes spotted in this Block 10.
After check the outcome for 5% of significance for SNPs, Prof. Flávio asked me to carry out again considering the interval between 5% and 15% of significance.
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf05<- -log10(0.05/Me)
bonf15<- -log10(0.15/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf05), col="black")
abline(h=(bonf15), col="red2")
legend("topleft", inset=c(0.05, 0.05), col="red2", lwd=2,
legend=c("15% sig. Bonferroni corr. for ind. segments"), bty="n")
legend("topleft", inset=c(0.05, 0.10), col="black", lwd=2,
legend=c("5% sig. Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
#Recovering the significant SNPs > <5%
SNP_sig_bonf_ind_seg_5 <- filter(gwas, logP > bonf05)
write.csv(SNP_sig_bonf_ind_seg_5, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5.csv")
#Recovering the significant SNPs > 15% and <5%
SNP_sig_bonf_ind_seg_5_15 <- filter(gwas, logP > bonf15 & logP < bonf05)
write.csv(SNP_sig_bonf_ind_seg_5_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5_15.csv")
#Recovering the significant SNPs > 15%
SNP_sig_bonf_ind_seg_15 <- filter(gwas, logP > bonf15)
write.csv(SNP_sig_bonf_ind_seg_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")
Below we can find the list of significant SNPs after the correction for Multiple Testing
| Chr | X | X15. | X5. | X5.15. | logP |
|---|---|---|---|---|---|
| 28 | BTA-99379-no-rs | X | X | 6.610970 | |
| 22 | ARS-BFGL-NGS-23411 | X | X | 5.485541 | |
| 22 | ARS-BFGL-NGS-26419 | X | X | 5.272074 | |
| 26 | BTA-62125-no-rs | X | X | 4.732155 | |
| 26 | BTB-00926636 | X | X | 4.732155 | |
| 11 | BTB-01060598 | X | X | 4.682532 | |
| 20 | BTB-01471673 | X | X | 4.182974 | |
| 23 | Hapmap54016-rs29022819 | X | X | 4.016453 | |
| 28 | ARS-BFGL-NGS-11935 | X | X | 3.986177 | |
| 28 | Hapmap43005-BTA-64606 | X | X | 3.986177 | |
| 11 | BTA-122098-no-rs | X | X | 3.980028 | |
| 19 | ARS-BFGL-NGS-29119 | X | X | 3.925461 | |
| 14 | Hapmap39823-BTA-35254 | X | X | 3.905277 | |
| 19 | ARS-BFGL-NGS-28901 | X | X | 3.888442 | |
| 3 | ARS-BFGL-NGS-4585 | X | X | 3.885943 | |
| 14 | UA-IFASA-7664 | X | X | 3.882503 |
To make easier to check the name of the significant SNPs I made the following Manhattan Plot
# Prepare the dataset
don <- gwas %>%
group_by(Chr) %>%
summarise(chr_len = max(bp)) %>%
mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
select(-chr_len) %>%
left_join(gwas, ., by = c("Chr" = "Chr")) %>%
arrange(Chr, bp) %>%
mutate(BPcum = as.numeric(bp) + tot) %>%
mutate(is_highlight = ifelse(logP > bonf15, "yes", "no")) %>%
mutate(is_annotate = ifelse(logP > bonf15, "yes", "no"))
# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))
# Prepare X axis
axisdf <- don %>%
group_by(Chr) %>%
summarize(center = (max(BPcum) + min(BPcum)) / 2)
# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)
# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
geom_hline(yintercept = bonf05, color = "black") + # Add horizontal line for bonf05
geom_hline(yintercept = bonf15, color = "red2") + # Add horizontal line for bonf15
geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
annotate("text", x = Inf, y = Inf, label = "15% sig. Bonferroni corr. for ind. segments",
color = "red2", hjust = 1.1, vjust = 2, size = 3) +
annotate("text", x = Inf, y = Inf, label = "5% sig. Bonferroni corr. for ind. segments",
color = "black", hjust = 1.1, vjust = 3.5, size = 3) +
theme_bw() +
theme(
legend.position = "none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")
Generating a file with the GWAS output and rsID
library(tidyverse)
# Extracting SNP names to use in the SNPChimp
SNP_NAMES <- SNP_sig_bonf_ind_seg_15 %>% pull(SNP)
SNP_NAMES <- paste(SNP_NAMES, collapse = ", ")
print(SNP_NAMES)
# Copy this printed names and paste on SNPChimp
# Opening the SNPChimp output in R
rsID <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/SNPchimp_result_ind_seg_15.tsv")
rsID <- rsID[,c("rs","SNP_name")]
#Merging in one file the GWAS output and rsID
SNP_rsID_sig_bonf_ind_seg_15 <- merge(SNP_sig_bonf_ind_seg_15, rsID, by.x = "SNP", by.y = "SNP_name")
SNP_rsID_sig_bonf_ind_seg_15 <- SNP_rsID_sig_bonf_ind_seg_15[, c("rs", "SNP", "Chr", "bp", "A1", "A2", "Freq", "b", "se", "p", "logP")]
write.csv(SNP_rsID_sig_bonf_ind_seg_15, "/home/bambrozi/2_CORTISOL/GWAS/SNP_rsID_bonf_ind_seg_15.csv")
Bellow we can see the table with the SNP names and rsID for all significant SNPs with p-value lower than 0,15.
| X | rs | SNP | Chr | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | rs109578343 | ARS-BFGL-NGS-11935 | 28 | 44942879 | A | C | 0.0297619 | 412.261 | 106.1750 | 0.0001032 | 3.986177 |
| 2 | rs110031217 | ARS-BFGL-NGS-23411 | 22 | 5134078 | A | C | 0.1865080 | 207.116 | 44.5110 | 0.0000033 | 5.485541 |
| 3 | rs110991998 | ARS-BFGL-NGS-26419 | 22 | 46052082 | A | C | 0.0813492 | 308.357 | 67.7592 | 0.0000053 | 5.272074 |
| 4 | rs109751680 | ARS-BFGL-NGS-28901 | 19 | 42338148 | C | A | 0.3710320 | 141.156 | 36.8765 | 0.0001293 | 3.888442 |
| 5 | rs41577586 | ARS-BFGL-NGS-29119 | 19 | 49744592 | A | C | 0.4047620 | -142.084 | 36.9169 | 0.0001187 | 3.925461 |
| 6 | rs109888380 | ARS-BFGL-NGS-4585 | 3 | 107160292 | A | C | 0.3353170 | -144.488 | 37.7607 | 0.0001300 | 3.885943 |
| 7 | rs41624254 | BTA-122098-no-rs | 11 | 31882807 | A | C | 0.4444440 | 131.779 | 33.9688 | 0.0001047 | 3.980028 |
| 8 | rs41644634 | BTA-62125-no-rs | 26 | 17899619 | C | A | 0.0257937 | 487.149 | 113.7690 | 0.0000185 | 4.732155 |
| 9 | rs41567074 | BTA-99379-no-rs | 28 | 41666186 | A | C | 0.0496032 | 435.809 | 84.4338 | 0.0000002 | 6.610970 |
| 10 | rs42089058 | BTB-00926636 | 26 | 17857480 | C | A | 0.0257937 | 487.149 | 113.7690 | 0.0000185 | 4.732155 |
| 11 | rs42217767 | BTB-01060598 | 11 | 32301594 | C | A | 0.4503970 | -152.746 | 35.8860 | 0.0000208 | 4.682532 |
| 12 | rs42590312 | BTB-01471673 | 20 | 11545685 | A | C | 0.2460320 | 169.707 | 42.5157 | 0.0000656 | 4.182974 |
| 13 | rs41632193 | Hapmap39823-BTA-35254 | 14 | 64061208 | A | C | 0.2916670 | -138.029 | 35.9698 | 0.0001244 | 3.905277 |
| 14 | rs41648479 | Hapmap43005-BTA-64606 | 28 | 45805105 | A | C | 0.0297619 | 412.261 | 106.1750 | 0.0001032 | 3.986177 |
| 15 | rs29022819 | Hapmap54016-rs29022819 | 23 | 33609998 | C | A | 0.1904760 | 173.228 | 44.4201 | 0.0000963 | 4.016453 |
| 16 | rs41632223 | UA-IFASA-7664 | 14 | 64299714 | C | A | 0.2678570 | -141.443 | 36.9840 | 0.0001311 | 3.882503 |
After insert all rsID on VEP the summary is shown below:
As can be seen, the VEP only analysed 14 variants
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs41648479
rs41632223
rs41632193
rs42590312
rs42089058
rs41567074
rs41644634
rs41624254
rs109888380
rs41577586
rs109751680
rs110991998
rs110031217
rs109578343
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_clean_15.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | ARS-BFGL-NGS-4585 | 107160292 | 3 | 107109148 | 107110689 | 1542 | + | ENSBTAG00000030338 | GJA9 | protein_coding |
| 2 | 3 | ARS-BFGL-NGS-4585 | 107160292 | 3 | 107111822 | 107117831 | 6010 | + | ENSBTAG00000030337 | MYCBP | protein_coding |
| 3 | 3 | ARS-BFGL-NGS-4585 | 107160292 | 3 | 107121920 | 107136338 | 14419 | + | ENSBTAG00000009368 | RRAGC | protein_coding |
| 4 | 11 | BTB-01060598 | 32301594 | 11 | 32278324 | 32766620 | 488297 | - | ENSBTAG00000046199 | NRXN1 | protein_coding |
| 5 | 11 | BTB-01060598 | 32301594 | 11 | 32280416 | 32280512 | 97 | - | ENSBTAG00000044573 | U6 | snRNA |
| 6 | 14 | Hapmap39823-BTA-35254 | 64061208 | 14 | 64047857 | 64103385 | 55529 | + | ENSBTAG00000017833 | RNF19A | protein_coding |
| 7 | 14 | UA-IFASA-7664 | 64299714 | 14 | 64289853 | 64398324 | 108472 | + | ENSBTAG00000019793 | RGS22 | protein_coding |
| 8 | 19 | ARS-BFGL-NGS-29119 | 49744592 | 19 | 49662140 | 49766195 | 104056 | + | ENSBTAG00000052072 | B3GNTL1 | protein_coding |
| 9 | 19 | ARS-BFGL-NGS-29119 | 49744592 | 19 | 49768681 | 49927209 | 158529 | - | ENSBTAG00000015414 | TBCD | protein_coding |
| 10 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42283857 | 42299780 | 15924 | - | ENSBTAG00000001195 | KCNH4 | protein_coding |
| 11 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42302320 | 42303571 | 1252 | - | ENSBTAG00000000665 | HCRT | protein_coding |
| 12 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42307188 | 42312293 | 5106 | - | ENSBTAG00000000666 | GHDC | protein_coding |
| 13 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42319170 | 42357910 | 38741 | - | ENSBTAG00000010125 | STAT5B | protein_coding |
| 14 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42387164 | 42390885 | 3722 | + | ENSBTAG00000052763 | NA | lncRNA |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 17 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 18 | 23 | Hapmap54016-rs29022819 | 33609998 | 23 | 33618285 | 33618377 | 93 | + | ENSBTAG00000045279 | SNORA70 | snoRNA |
| 19 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 20 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 21 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 22 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 23 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 24 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
| 25 | 28 | Hapmap43005-BTA-64606 | 45805105 | 28 | 45757443 | 45768564 | 11122 | + | ENSBTAG00000012393 | AGT | protein_coding |
| 26 | 28 | Hapmap43005-BTA-64606 | 45805105 | 28 | 45772443 | 45814853 | 42411 | - | ENSBTAG00000020314 | COG2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40198 |
| 2 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40199 |
| 3 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40200 |
| 4 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40201 |
| 5 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40202 |
| 6 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40203 |
| 7 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Production | 107191525 | 107191529 | 40204 |
| 8 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Production | 107191525 | 107191529 | 40205 |
| 9 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40206 |
| 10 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40207 |
| 11 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40208 |
| 12 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40209 |
| 13 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Health | 107191525 | 107191529 | 40210 |
| 14 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40211 |
| 15 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40212 |
| 16 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40213 |
| 17 | 11 | BTA-122098-no-rs | 31882807 | Milk | 31870044 | 31870048 | 122275 |
| 18 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013785 | 64013789 | 247433 |
| 19 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013858 | 64013862 | 241580 |
| 20 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013858 | 64013862 | 252031 |
| 21 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013897 | 64013901 | 241079 |
| 22 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013897 | 64013901 | 251434 |
| 23 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013952 | 64013956 | 241853 |
| 24 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013952 | 64013956 | 252403 |
| 25 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64015726 | 64015730 | 105628 |
| 26 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64017424 | 64017428 | 252151 |
| 27 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64017970 | 64017974 | 18873 |
| 28 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 18872 |
| 29 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 105335 |
| 30 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 173792 |
| 31 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 175101 |
| 32 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 176088 |
| 33 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 176487 |
| 34 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 252723 |
| 35 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64063422 | 64063426 | 175102 |
| 36 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64068329 | 64068333 | 105336 |
| 37 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64081490 | 64081494 | 104637 |
| 38 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64081490 | 64081494 | 104890 |
| 39 | 14 | Hapmap39823-BTA-35254 | 64061208 | Health | 64081490 | 64081494 | 179830 |
| 40 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64087234 | 64087238 | 175103 |
| 41 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64087234 | 64087238 | 252869 |
| 42 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64087450 | 64087454 | 252628 |
| 43 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64088569 | 64088573 | 252926 |
| 44 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64103583 | 64103587 | 104638 |
| 45 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64103583 | 64103587 | 104891 |
| 46 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64272869 | 64272873 | 102476 |
| 47 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64272869 | 64272873 | 104484 |
| 48 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64272869 | 64272873 | 104863 |
| 49 | 14 | UA-IFASA-7664 | 64299714 | Health | 64272869 | 64272873 | 179764 |
| 50 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64283263 | 64283267 | 175108 |
| 51 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64283263 | 64283267 | 251800 |
| 52 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64283947 | 64283951 | 251398 |
| 53 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64288968 | 64288972 | 247633 |
| 54 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64288968 | 64288972 | 252142 |
| 55 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 18875 |
| 56 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 105660 |
| 57 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 158137 |
| 58 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 173793 |
| 59 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 175109 |
| 60 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 176085 |
| 61 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 176486 |
| 62 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 250009 |
| 63 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 252724 |
| 64 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64302757 | 64302761 | 251934 |
| 65 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64308608 | 64308612 | 252948 |
| 66 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64317001 | 64317005 | 252652 |
| 67 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64322496 | 64322500 | 251752 |
| 68 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20269 |
| 69 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20311 |
| 70 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20393 |
| 71 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20464 |
| 72 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64327904 | 64327908 | 32596 |
| 73 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64327904 | 64327908 | 32634 |
| 74 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64328322 | 64328326 | 252206 |
| 75 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64339118 | 64339122 | 251497 |
| 76 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64343288 | 64343292 | 252287 |
| 77 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42292941 | 42292945 | 147657 |
| 78 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42292941 | 42292945 | 147675 |
| 79 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173073 |
| 80 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173074 |
| 81 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173075 |
| 82 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173076 |
| 83 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173077 |
| 84 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173237 |
| 85 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42338146 | 42338150 | 166545 |
| 86 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42338146 | 42338150 | 166546 |
| 87 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42338146 | 42338150 | 253605 |
| 88 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42349650 | 42349654 | 147630 |
| 89 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 195323 |
| 90 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 195324 |
| 91 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 195325 |
| 92 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 205408 |
| 93 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Meat_and_Carcass | 49715846 | 49715850 | 107754 |
| 94 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49735877 | 49735881 | 196362 |
| 95 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49772086 | 49772090 | 195342 |
| 96 | 20 | BTB-01471673 | 11545685 | Production | 11498161 | 11498165 | 65554 |
| 97 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 98 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 99 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 100 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 101 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 102 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 103 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 104 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 105 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 106 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 107 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 108 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 109 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 110 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 111 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 112 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 113 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 115 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 117 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 119 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 120 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 121 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 122 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 123 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 124 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 125 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 126 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 127 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 128 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 129 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 130 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 131 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 132 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 133 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 134 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 135 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 136 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 137 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 138 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 139 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 140 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 141 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 142 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 143 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 144 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 145 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 146 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 147 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 148 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 149 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 150 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 151 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 152 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 153 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 154 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 155 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 156 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 157 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 158 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 159 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 160 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 161 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 162 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 163 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 164 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 165 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 166 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 167 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 168 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 169 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 170 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 171 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 172 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 173 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 174 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 175 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 176 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 177 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 178 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 179 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 180 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 181 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 182 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 183 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 184 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 185 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 186 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 187 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 188 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 189 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 190 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 191 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 192 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 193 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 194 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 195 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 196 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 197 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 198 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 199 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 200 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 201 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 202 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 203 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 204 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 205 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 206 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 207 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 208 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 209 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 210 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 211 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 212 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 213 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 214 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 215 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 216 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 217 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 218 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 219 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 220 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 221 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 222 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 223 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 224 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 225 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 226 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 227 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 228 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 229 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 230 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 231 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 232 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 233 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 234 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 235 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 236 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 237 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 238 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
| 239 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Reproduction | 44971508 | 44971512 | 238444 |
| 240 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Production | 44981682 | 44981686 | 66703 |
| 241 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Production | 44981682 | 44981686 | 66704 |
| 242 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Production | 44981682 | 44981686 | 69429 |
| 243 | 28 | Hapmap43005-BTA-64606 | 45805105 | Milk | 45811156 | 45811160 | 34467 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_name_15.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_type_15.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 15 | Milk C14 index | 35 | 4437 | 209 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 28 | Milk myristoleic acid content | 26 | 3047 | 209 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 31 | Milk protein percentage | 39 | 8803 | 209 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 29 | Milk palmitoleic acid content | 15 | 2422 | 209 | 163224 | 0.0000007 | 0.0000079 | Milk |
| 16 | Milk C16 index | 12 | 2002 | 209 | 163224 | 0.0000129 | 0.0001159 | Milk |
| 36 | Muscle carnosine content | 3 | 67 | 209 | 163224 | 0.0000933 | 0.0006998 | Meat and Carcass |
| 23 | Milk lactose content | 3 | 105 | 209 | 163224 | 0.0003523 | 0.0022648 | Milk |
| 20 | Milk fat content | 2 | 92 | 209 | 163224 | 0.0063319 | 0.0356168 | Milk |
| 27 | Milk myristic acid content | 4 | 902 | 209 | 163224 | 0.0294859 | 0.1449683 | Milk |
| 33 | Milk stearic acid content | 2 | 218 | 209 | 163224 | 0.0322152 | 0.1449683 | Milk |
| 30 | Milk pentadecylic acid content | 2 | 280 | 209 | 163224 | 0.0505491 | 0.2067917 | Milk |
| 44 | Teat length | 2 | 300 | 209 | 163224 | 0.0570974 | 0.2141153 | Exterior |
| 14 | Milk arachidic acid content | 1 | 60 | 209 | 163224 | 0.0740084 | 0.2220251 | Milk |
| 25 | Milk margaric acid content | 1 | 59 | 209 | 163224 | 0.0728207 | 0.2220251 | Milk |
| 26 | Milk mid-infrared spectra | 1 | 55 | 209 | 163224 | 0.0680550 | 0.2220251 | Milk |
| 8 | Ketosis | 2 | 441 | 209 | 163224 | 0.1101333 | 0.2915293 | Health |
| 40 | Rear leg placement - side view | 2 | 430 | 209 | 163224 | 0.1056354 | 0.2915293 | Exterior |
| 39 | Pregnancy rate | 3 | 944 | 209 | 163224 | 0.1218012 | 0.3045029 | Reproduction |
| 37 | Myristic acid content | 1 | 120 | 209 | 163224 | 0.1425637 | 0.3376510 | Meat and Carcass |
| 6 | Fat thickness at the 12th rib | 1 | 169 | 209 | 163224 | 0.1947851 | 0.4382665 | Meat and Carcass |
| 7 | Foot angle | 2 | 672 | 209 | 163224 | 0.2129475 | 0.4563160 | Exterior |
| 13 | Maturity rate | 1 | 243 | 209 | 163224 | 0.2677107 | 0.5072331 | Production |
| 17 | Milk C18 index | 1 | 246 | 209 | 163224 | 0.2705243 | 0.5072331 | Milk |
| 42 | Stillbirth | 3 | 1363 | 209 | 163224 | 0.2544359 | 0.5072331 | Reproduction |
| 18 | Milk capric acid content | 2 | 912 | 209 | 163224 | 0.3259016 | 0.5640605 | Milk |
| 38 | Net merit | 2 | 903 | 209 | 163224 | 0.3216964 | 0.5640605 | Production |
| 24 | Milk lauric acid content | 1 | 366 | 209 | 163224 | 0.3746664 | 0.6021425 | Milk |
| 32 | Milk protein yield | 5 | 3093 | 209 | 163224 | 0.3633449 | 0.6021425 | Milk |
| 41 | Somatic cell score | 2 | 1122 | 209 | 163224 | 0.4213659 | 0.6538437 | Health |
| 5 | Dairy form | 1 | 514 | 209 | 163224 | 0.4829472 | 0.7244208 | Exterior |
| 3 | Calving ease | 5 | 3819 | 209 | 163224 | 0.5419946 | 0.7725995 | Reproduction |
| 9 | Lean meat yield | 1 | 621 | 209 | 163224 | 0.5494041 | 0.7725995 | Meat and Carcass |
| 43 | Strength | 1 | 664 | 209 | 163224 | 0.5736508 | 0.7813075 | Exterior |
| 45 | Udder depth | 1 | 695 | 209 | 163224 | 0.5903212 | 0.7813075 | Exterior |
| 19 | Milk caprylic acid content | 1 | 811 | 209 | 163224 | 0.6471450 | 0.8320436 | Milk |
| 10 | Length of productive life | 2 | 2004 | 209 | 163224 | 0.7280866 | 0.8855108 | Production |
| 35 | Milking speed | 1 | 1011 | 209 | 163224 | 0.7273020 | 0.8855108 | Milk |
| 2 | Body weight gain | 1 | 1354 | 209 | 163224 | 0.8248432 | 0.9440581 | Production |
| 4 | Conception rate | 1 | 1255 | 209 | 163224 | 0.8009513 | 0.9440581 | Reproduction |
| 11 | Longissimus muscle area | 1 | 1420 | 209 | 163224 | 0.8391628 | 0.9440581 | Meat and Carcass |
| 12 | Marbling score | 1 | 1817 | 209 | 163224 | 0.9037804 | 0.9830306 | Meat and Carcass |
| 34 | Milk yield | 5 | 6432 | 209 | 163224 | 0.9174952 | 0.9830306 | Milk |
| 1 | Body weight | 2 | 4289 | 209 | 163224 | 0.9746418 | 0.9967928 | Production |
| 21 | Milk fat percentage | 8 | 10941 | 209 | 163224 | 0.9726277 | 0.9967928 | Milk |
| 22 | Milk fat yield | 1 | 8220 | 209 | 163224 | 0.9999797 | 0.9999797 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 9 | 9077 | 209 | 163224 | 0.8264587 | 1 |
| 2 | Health | 4 | 5889 | 209 | 163224 | 0.9456284 | 1 |
| 3 | Meat and Carcass | 8 | 18258 | 209 | 163224 | 0.9999640 | 1 |
| 4 | Milk | 168 | 75352 | 209 | 163224 | 0.0000000 | 0 |
| 5 | Production | 8 | 19640 | 209 | 163224 | 0.9999916 | 1 |
| 6 | Reproduction | 12 | 35008 | 209 | 163224 | 1.0000000 | 1 |
online version: https://biit.cs.ut.ee/gprofiler/gost
I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.
### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)
#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list
#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.txt", header = T)
query <- query[,c("gene_id")]
gene_enrich <- gost(
query,
organism = "btaurus",
ordered_query = FALSE,
multi_query = FALSE,
significant = TRUE,
exclude_iea = FALSE,
measure_underrepresentation = FALSE,
evcodes = TRUE,
user_threshold = 0.05,
correction_method = c("fdr"),
domain_scope = c("annotated"),
numeric_ns = "",
sources = NULL,
as_short_link = FALSE,
highlight = FALSE
)
#str(gene_enrich) # para ver o formato dos meus dados
#selecionando apenas as informações da lista que me interessam para fazer meu data.frame
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
ID = result_enrich$term_id,
Term = result_enrich$term_name,
adj_pvalue = result_enrich$p_value,
id_ensembl = result_enrich$intersection)
write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/Bonf_5_15/gene_enrich_15.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/Bonf_5_15/gene_enrich_15.csv")
Below we can se the significant terms of this enrichment (output):
| X | Category | ID | Term | adj_pvalue | id_ensembl |
|---|---|---|---|---|---|
| 1 | GO:MF | GO:0004352 | glutamate dehydrogenase (NAD+) activity | 0.0290548 | ENSBTAG00000007540 |
| 2 | GO:MF | GO:0004353 | glutamate dehydrogenase [NAD(P)+] activity | 0.0290548 | ENSBTAG00000007540 |
| 3 | GO:MF | GO:0016639 | oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor | 0.0290548 | ENSBTAG00000007540 |
| 4 | GO:MF | GO:0031702 | type 1 angiotensin receptor binding | 0.0290548 | ENSBTAG00000012393 |
| 5 | GO:MF | GO:0031703 | type 2 angiotensin receptor binding | 0.0290548 | ENSBTAG00000012393 |
| 6 | GO:MF | GO:0005026 | transforming growth factor beta receptor activity, type II | 0.0414893 | ENSBTAG00000019832 |
| 7 | GO:MF | GO:0102193 | protein-ribulosamine 3-kinase activity | 0.0414893 | ENSBTAG00000015414 |
From the online version of GPROFILER i got the following results.
Legend
The GO CONTEXT provided by GPROFILER can be seen in the image below:
There was no necessity to make this plot.
First I need to generate the .ped file
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
plink \
--bfile ${DATA} \
--cow \
--recode \
--out ${RESULT}
Second, is necessary to convert the .ped file to Linkage format:
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in
plink \
--file ${DATA} \
--cow \
--recode HV \
--out ${RESULT}
The code above will generate a pair of files .ped and .info for each chromosome
I ran the haploview only for chromosome 03, 11, 14, 19, 20, 22, 23, 26 and 28 that are when the significant SNP are located.
As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot
| VEP | rs | SNP | Chr | bp | A1 | A2 | Freq | Haploview.Order | Block |
|---|---|---|---|---|---|---|---|---|---|
| yes | rs109888380 | ARS-BFGL-NGS-4585 | 3 | 107160292 | A | C | 0.3353170 | 1818 | |
| yes | rs41624254 | BTA-122098-no-rs | 11 | 31882807 | A | C | 0.4444440 | 674 | |
| no | rs42217767 | BTB-01060598 | 11 | 32301594 | C | A | 0.4503970 | 680 | |
| yes | rs41632193 | Hapmap39823-BTA-35254 | 14 | 64061208 | A | C | 0.2916670 | 1196 | |
| yes | rs41632223 | UA-IFASA-7664 | 14 | 64299714 | C | A | 0.2678570 | 1200 | |
| yes | rs109751680 | ARS-BFGL-NGS-28901 | 19 | 42338148 | C | A | 0.3710320 | 713 | |
| yes | rs41577586 | ARS-BFGL-NGS-29119 | 19 | 49744592 | A | C | 0.4047620 | 849 | Block 38 |
| yes | rs42590312 | BTB-01471673 | 20 | 11545685 | A | C | 0.2460320 | 232 | |
| yes | rs110031217 | ARS-BFGL-NGS-23411 | 22 | 5134078 | A | C | 0.1865080 | 88 | |
| yes | rs110991998 | ARS-BFGL-NGS-26419 | 22 | 46052082 | A | C | 0.0813492 | 762 | |
| no | rs29022819 | Hapmap54016-rs29022819 | 23 | 33609998 | C | A | 0.1904760 | 534 | |
| yes | rs41644634 | BTA-62125-no-rs | 26 | 17899619 | C | A | 0.0257937 | 297 | Block 10 |
| yes | rs42089058 | BTB-00926636 | 26 | 17857480 | C | A | 0.0257937 | 296 | Block 10 |
| yes | rs109578343 | ARS-BFGL-NGS-11935 | 28 | 44942879 | A | C | 0.0297619 | 773 | |
| yes | rs41567074 | BTA-99379-no-rs | 28 | 41666186 | A | C | 0.0496032 | 701 | |
| yes | rs41648479 | Hapmap43005-BTA-64606 | 28 | 45805105 | A | C | 0.0297619 | 791 | Block 21 |
As can be observed in the column “Block”,four significant SNPs fell within a Haploblock.
rs41577586 (ARS-BFGL-NGS-29119) - chr 19 - order 849 - Block 38 rs41644634 (BTA-62125-no-rs) - chr 26 - order 297 - Block 10 rs42089058 (BTB-00926636) - chr 26 - order 296 - Block 10 rs41648479 (Hapmap43005-BTA-64606) - chr 28 - order 791 - Block 21
Chr 3 (SNP 1818)
Chr 11 (SNP 674 and
680)
Chr 14 (SNP 1196 and
1200)
Chr 19 (SNP 713)
Chr 19 (SNP 849)
Chr 20 (SNP 232)
Chr 22 (SNP 88)
Chr 22 (SNP 762)
Chr 23 (SNP 534)
Chr 26 (SNP 296 and
297)
Chr 28 (SNP 701)
Chr 28 (SNP 773 and
791)
To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.
Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.
Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.
The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]
Where:
This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.
The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files
rm(list = ls())
# Load the necessary library
library(dplyr)
library(tidyverse)
cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")
sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]
sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]
print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)
# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)
# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)
# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))
if (have_same_columns) {
print("The dataframes have the same columns.")
} else {
print("The dataframes do not have the same columns.")
}
# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))
if (same_order) {
print("The columns are in the same order.")
} else {
print("The columns are not in the same order.")
}
GEBVs_combined <- rbind(GEBVs1, GEBVs2)
# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)
# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)
# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME
# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
select(elora_id, DHI_BARN_NAME, everything())
write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")
# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")
write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")
#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")
data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.txt")
ps. I double checked by hand the select and merge process against the original tables received and is everything ok.
# Fit the linear regression model
model <- lm(T4Cortisol ~ MILK, data = data)
# Summarize the model to get the regression coefficients and statistical summary
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1]
p_value <- pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)
# Combine the statistics into a data frame
results <- data.frame(
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value
)
# Save the results to a CSV file
write.csv(results, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_milk.csv", row.names = FALSE)
# Plot the linear regression between T4Cortisol and MILK
ggplot(data, aes(x = MILK, y = T4Cortisol)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Linear Regression of T4Cortisol on MILK",
x = "MILK",
y = "T4Cortisol") +
theme_minimal()
Plot linear regression
Cortisol vs. Milk GEBV
Summary statistics
| Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value |
|---|---|---|---|
| 5.2e-05 | -0.0038237 | 0.0134257 | 0.9078463 |
As we have 54 GEBVs to fit a linear regression we designed a loop:
# Initialize a list to store the results
results_list <- list()
# Loop through columns 3 to 56 for the GEBVs
for (i in 3:ncol(data)) {
trait_name <- colnames(data)[i]
# Fit the linear regression model
model <- lm(data[[2]] ~ data[[i]], data = data)
# Summarize the model
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1] # F-statistic value
f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
# Combine the statistics into a data frame
result <- data.frame(
Trait = trait_name,
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value
)
# Append the result to the results list
results_list[[i - 2]] <- result
}
# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)
# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv", row.names = FALSE)
Summary statistics for all Traits’ GEBVs
| Trait | Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value |
|---|---|---|---|---|
| BMR | 0.0400473 | 0.0363266 | 10.7632421 | 0.0011781 |
| CO | 0.0280949 | 0.0243278 | 7.4580136 | 0.0067510 |
| MSL | 0.0198445 | 0.0160455 | 5.2235464 | 0.0230946 |
| CTFS | 0.0185227 | 0.0147186 | 4.8690531 | 0.0282237 |
| UT | 0.0166425 | 0.0128310 | 4.3664218 | 0.0376334 |
| MS | 0.0128559 | 0.0090298 | 3.3600230 | 0.0679494 |
| CK | 0.0106156 | 0.0067808 | 2.7682148 | 0.0973674 |
| IH | 0.0101222 | 0.0062855 | 2.6382359 | 0.1055404 |
| DD | 0.0100161 | 0.0061790 | 2.6103042 | 0.1073935 |
| CONF | 0.0095452 | 0.0057062 | 2.4863924 | 0.1160600 |
| UD | 0.0085295 | 0.0046866 | 2.2195470 | 0.1374945 |
| SU | 0.0077590 | 0.0039131 | 2.0174678 | 0.1567056 |
| MT | 0.0076274 | 0.0037810 | 1.9829901 | 0.1602796 |
| LP | 0.0074242 | 0.0035770 | 1.9297608 | 0.1659824 |
| MET | 0.0070642 | 0.0032156 | 1.8355367 | 0.1766601 |
| MR | 0.0066755 | 0.0028254 | 1.7338637 | 0.1890867 |
| DO | 0.0060791 | 0.0022267 | 1.5779921 | 0.2101864 |
| DHL | 0.0052895 | 0.0014340 | 1.3719419 | 0.2425591 |
| MOB | 0.0047368 | 0.0008792 | 1.2279220 | 0.2688435 |
| DF | 0.0046402 | 0.0007822 | 1.2027541 | 0.2737945 |
| FAT | 0.0046204 | 0.0007623 | 1.1975980 | 0.2748229 |
| SH | 0.0043235 | 0.0004643 | 1.1203076 | 0.2908422 |
| DS | 0.0043121 | 0.0004528 | 1.1173348 | 0.2914818 |
| FOOT | 0.0043005 | 0.0004412 | 1.1143288 | 0.2921304 |
| FA | 0.0042762 | 0.0004168 | 1.1079850 | 0.2935052 |
| PROT | 0.0040026 | 0.0001421 | 1.0368190 | 0.3095163 |
| WL | 0.0038262 | -0.0000350 | 0.9909414 | 0.3204451 |
| MDR | 0.0038096 | -0.0000516 | 0.9866366 | 0.3214965 |
| CA | 0.0036379 | -0.0002239 | 0.9420177 | 0.3326685 |
| ME | 0.0034176 | -0.0004452 | 0.8847561 | 0.3477821 |
| SCS | 0.0031318 | -0.0007321 | 0.8105371 | 0.3688011 |
| FE | 0.0028874 | -0.0009774 | 0.7470985 | 0.3881993 |
| FTP | 0.0027874 | -0.0010778 | 0.7211475 | 0.3965550 |
| AFS | 0.0025212 | -0.0013450 | 0.6521188 | 0.4201001 |
| UF | 0.0018686 | -0.0020001 | 0.4830008 | 0.4876916 |
| ST | 0.0018050 | -0.0020640 | 0.4665276 | 0.4952019 |
| MSP | 0.0014890 | -0.0023812 | 0.3847251 | 0.5356327 |
| HL | 0.0013205 | -0.0025503 | 0.3411423 | 0.5596809 |
| SCK | 0.0011411 | -0.0027305 | 0.2947310 | 0.5876733 |
| TL | 0.0009008 | -0.0029717 | 0.2326179 | 0.6299983 |
| LOC | 0.0008542 | -0.0030185 | 0.2205721 | 0.6390011 |
| DA | 0.0006826 | -0.0031907 | 0.1762328 | 0.6749803 |
| CW | 0.0005859 | -0.0032878 | 0.1512479 | 0.6976664 |
| BCS | 0.0003594 | -0.0035151 | 0.0927672 | 0.7609338 |
| RUM | 0.0003149 | -0.0035598 | 0.0812707 | 0.7758114 |
| TU | 0.0002204 | -0.0036547 | 0.0568793 | 0.8116875 |
| FL | 0.0001793 | -0.0036960 | 0.0462618 | 0.8298705 |
| BQ | 0.0001697 | -0.0037056 | 0.0437957 | 0.8343993 |
| HHE | 0.0000521 | -0.0038236 | 0.0134478 | 0.9077707 |
| MILK | 0.0000520 | -0.0038237 | 0.0134257 | 0.9078463 |
| HH | 0.0000509 | -0.0038249 | 0.0131340 | 0.9088482 |
| BD | 0.0000293 | -0.0038466 | 0.0075535 | 0.9308097 |
| DCA | 0.0000046 | -0.0038713 | 0.0011932 | 0.9724707 |
| FEED | 0.0000001 | -0.0038759 | 0.0000152 | 0.9968903 |
The regression model added the BY is shown bellow:
\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + \beta_2 \times BIRTH\_YEAR + \epsilon \]
Where:
The model can be implemented in R using the lm function,
which fits linear models. The BIRTH_YEAR variable is
converted to a factor to account for the categorical nature of birth
years.
# Convert BIRTH_YEAR to a factor and rename
data$BIRTH_YEAR <- as.factor(data$BIRTH_YEAR)
# Initialize a list to store the results
results_list <- list()
# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
trait_name <- colnames(data)[i]
# Fit the linear regression model with BIRTH_YEAR as an additional predictor
model <- lm(data[[2]] ~ data[[i]] + BIRTH_YEAR, data = data)
# Summarize the model
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1] # F-statistic value
f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
# Extract the coefficient and its p-value for the trait
coef_summary <- coef(model_summary)
trait_coef <- coef_summary[2, "Estimate"] # Assumes the trait is the second predictor
trait_p_value <- coef_summary[2, "Pr(>|t|)"]
# Combine the statistics into a data frame
result <- data.frame(
Trait = trait_name,
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value,
Coefficient = trait_coef,
Coefficient_P_Value = trait_p_value
)
# Append the result to the results list
results_list[[i - 2]] <- result
}
# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)
# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)
Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR
| Trait | Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value | Coefficient | Coefficient_P_Value |
|---|---|---|---|---|---|---|
| BMR | 0.0881915 | 0.0738886 | 6.165997 | 0.0000945 | 19.1527245 | 0.0028131 |
| CO | 0.0803421 | 0.0659161 | 5.569252 | 0.0002590 | -12.5790675 | 0.0094097 |
| MSL | 0.0734487 | 0.0589145 | 5.053527 | 0.0006186 | -16.2613977 | 0.0277614 |
| IH | 0.0719150 | 0.0573568 | 4.939831 | 0.0007494 | -9.3449051 | 0.0354801 |
| CTFS | 0.0705440 | 0.0559642 | 4.838504 | 0.0008891 | 14.3231800 | 0.0442704 |
| UT | 0.0704128 | 0.0558310 | 4.828827 | 0.0009037 | -14.8606501 | 0.0452229 |
| MS | 0.0686230 | 0.0540132 | 4.697043 | 0.0011284 | -14.8462730 | 0.0606040 |
| CK | 0.0670600 | 0.0524256 | 4.582365 | 0.0013689 | 15.4450943 | 0.0785803 |
| PROT | 0.0658116 | 0.0511577 | 4.491053 | 0.0015963 | 2.1108214 | 0.0970292 |
| DD | 0.0638377 | 0.0491528 | 4.347166 | 0.0020332 | -8.6326827 | 0.1365405 |
| FAT | 0.0628176 | 0.0481167 | 4.273045 | 0.0023028 | 1.2796392 | 0.1637385 |
| SU | 0.0623970 | 0.0476895 | 4.242529 | 0.0024239 | 7.0165690 | 0.1767005 |
| CONF | 0.0621537 | 0.0474424 | 4.224892 | 0.0024968 | -11.6231634 | 0.1847326 |
| MET | 0.0618359 | 0.0471196 | 4.201861 | 0.0025952 | -7.2376432 | 0.1958719 |
| MR | 0.0617517 | 0.0470341 | 4.195767 | 0.0026218 | -7.5588944 | 0.1989504 |
| FOOT | 0.0614269 | 0.0467042 | 4.172251 | 0.0027273 | 31.1509094 | 0.2113774 |
| MOB | 0.0613973 | 0.0466741 | 4.170111 | 0.0027372 | 8.6217376 | 0.2125534 |
| LP | 0.0613481 | 0.0466241 | 4.166549 | 0.0027536 | 8.0864569 | 0.2145285 |
| FTP | 0.0608284 | 0.0460963 | 4.128971 | 0.0029327 | 13.5010333 | 0.2367661 |
| DO | 0.0607904 | 0.0460577 | 4.126220 | 0.0029463 | 8.8960747 | 0.2385010 |
| ME | 0.0607825 | 0.0460496 | 4.125648 | 0.0029491 | -6.2944604 | 0.2388636 |
| UD | 0.0606216 | 0.0458862 | 4.114024 | 0.0030071 | -12.1091142 | 0.2463830 |
| FA | 0.0605549 | 0.0458185 | 4.109209 | 0.0030315 | -8.6648464 | 0.2495819 |
| MT | 0.0601784 | 0.0454361 | 4.082026 | 0.0031728 | -8.2421107 | 0.2686411 |
| CA | 0.0599002 | 0.0451535 | 4.061948 | 0.0032814 | 8.3011337 | 0.2838987 |
| DF | 0.0598369 | 0.0450892 | 4.057381 | 0.0033066 | 7.4854663 | 0.2875213 |
| DHL | 0.0595211 | 0.0447684 | 4.034611 | 0.0034352 | -8.1319072 | 0.3064961 |
| SH | 0.0586677 | 0.0439017 | 3.973163 | 0.0038074 | 4.4345159 | 0.3666760 |
| MILK | 0.0585384 | 0.0437704 | 3.963861 | 0.0038671 | 0.0328321 | 0.3771595 |
| MDR | 0.0584166 | 0.0436466 | 3.955101 | 0.0039243 | 6.0467308 | 0.3874240 |
| ST | 0.0578881 | 0.0431099 | 3.917120 | 0.0041817 | -7.9781463 | 0.4369789 |
| SCS | 0.0576964 | 0.0429152 | 3.903356 | 0.0042790 | -3.9614430 | 0.4573264 |
| AFS | 0.0575918 | 0.0428089 | 3.895845 | 0.0043331 | -5.2064660 | 0.4690656 |
| MSP | 0.0575087 | 0.0427245 | 3.889880 | 0.0043765 | -3.9533312 | 0.4787399 |
| DS | 0.0573283 | 0.0425413 | 3.876936 | 0.0044723 | -4.7841746 | 0.5009026 |
| WL | 0.0572605 | 0.0424724 | 3.872073 | 0.0045088 | 3.4479700 | 0.5096796 |
| TL | 0.0572547 | 0.0424665 | 3.871655 | 0.0045119 | -6.7563254 | 0.5104479 |
| FE | 0.0572054 | 0.0424165 | 3.868123 | 0.0045386 | -3.8209975 | 0.5170084 |
| DA | 0.0566761 | 0.0418789 | 3.830184 | 0.0048356 | 4.3714958 | 0.5986609 |
| HHE | 0.0565353 | 0.0417358 | 3.820095 | 0.0049178 | 2.6941677 | 0.6249126 |
| HL | 0.0563327 | 0.0415301 | 3.805592 | 0.0050384 | -3.1669335 | 0.6676242 |
| LOC | 0.0562600 | 0.0414563 | 3.800388 | 0.0050823 | -3.0195770 | 0.6847865 |
| FL | 0.0562527 | 0.0414489 | 3.799865 | 0.0050868 | 2.8115963 | 0.6865743 |
| SCK | 0.0560192 | 0.0412117 | 3.783153 | 0.0052307 | 1.9379506 | 0.7520160 |
| CW | 0.0559471 | 0.0411384 | 3.777995 | 0.0052759 | -1.9950607 | 0.7767448 |
| FEED | 0.0559443 | 0.0411356 | 3.777797 | 0.0052777 | -1.3559978 | 0.7777588 |
| BQ | 0.0559195 | 0.0411104 | 3.776021 | 0.0052933 | -1.9570851 | 0.7870649 |
| TU | 0.0558834 | 0.0410737 | 3.773438 | 0.0053162 | 1.0731908 | 0.8014578 |
| UF | 0.0558449 | 0.0410347 | 3.770689 | 0.0053406 | 2.2354069 | 0.8181403 |
| DCA | 0.0557115 | 0.0408991 | 3.761148 | 0.0054263 | -1.0069789 | 0.8965483 |
| BD | 0.0557011 | 0.0408885 | 3.760401 | 0.0054331 | 0.8702554 | 0.9055126 |
| BCS | 0.0556786 | 0.0408657 | 3.758795 | 0.0054477 | 0.6193741 | 0.9285710 |
| RUM | 0.0556595 | 0.0408464 | 3.757433 | 0.0054601 | -0.3154634 | 0.9570449 |
| HH | 0.0556561 | 0.0408428 | 3.757185 | 0.0054623 | -0.2539972 | 0.9646162 |
Below we can check the improvement in the percentage of variation in Cortisol explained by the Trait_GEBVs + Birth_Year.
CORR_BY <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv")
CORR_MIN <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv")
CORR_BY <- CORR_BY[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]
CORR_MIN <- CORR_MIN[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]
colnames(CORR_BY)[colnames(CORR_BY) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_BY"
colnames(CORR_MIN)[colnames(CORR_MIN) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_MIN"
Corr_R2_comp <- merge(CORR_MIN, CORR_BY, by.x = "Trait", by.y = "Trait")
Corr_R2_comp$Absolute_Change <- Corr_R2_comp$Adjusted_R_Squared_BY - Corr_R2_comp$Adjusted_R_Squared_MIN
write.csv(Corr_R2_comp, "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/absolute_improv_r2.csv")
| X | Trait | Adjusted_R_Squared_Model_MIN | Adjusted_R_Squared_MIN.BY | Absolute_Change |
|---|---|---|---|---|
| 1 | AFS | -0.0013450 | 0.0428089 | 0.0441539 |
| 2 | BCS | -0.0035151 | 0.0408657 | 0.0443809 |
| 3 | BD | -0.0038466 | 0.0408885 | 0.0447351 |
| 4 | BMR | 0.0363266 | 0.0738886 | 0.0375621 |
| 5 | BQ | -0.0037056 | 0.0411104 | 0.0448160 |
| 6 | CA | -0.0002239 | 0.0451535 | 0.0453774 |
| 7 | CK | 0.0067808 | 0.0524256 | 0.0456448 |
| 8 | CO | 0.0243278 | 0.0659161 | 0.0415882 |
| 9 | CONF | 0.0057062 | 0.0474424 | 0.0417362 |
| 10 | CTFS | 0.0147186 | 0.0559642 | 0.0412457 |
| 11 | CW | -0.0032878 | 0.0411384 | 0.0444262 |
| 12 | DA | -0.0031907 | 0.0418789 | 0.0450696 |
| 13 | DCA | -0.0038713 | 0.0408991 | 0.0447705 |
| 14 | DD | 0.0061790 | 0.0491528 | 0.0429738 |
| 15 | DF | 0.0007822 | 0.0450892 | 0.0443070 |
| 16 | DHL | 0.0014340 | 0.0447684 | 0.0433344 |
| 17 | DO | 0.0022267 | 0.0460577 | 0.0438310 |
| 18 | DS | 0.0004528 | 0.0425413 | 0.0420884 |
| 19 | FA | 0.0004168 | 0.0458185 | 0.0454018 |
| 20 | FAT | 0.0007623 | 0.0481167 | 0.0473544 |
| 21 | FE | -0.0009774 | 0.0424165 | 0.0433939 |
| 22 | FEED | -0.0038759 | 0.0411356 | 0.0450115 |
| 23 | FL | -0.0036960 | 0.0414489 | 0.0451449 |
| 24 | FOOT | 0.0004412 | 0.0467042 | 0.0462629 |
| 25 | FTP | -0.0010778 | 0.0460963 | 0.0471741 |
| 26 | HH | -0.0038249 | 0.0408428 | 0.0446677 |
| 27 | HHE | -0.0038236 | 0.0417358 | 0.0455595 |
| 28 | HL | -0.0025503 | 0.0415301 | 0.0440805 |
| 29 | IH | 0.0062855 | 0.0573568 | 0.0510713 |
| 30 | LOC | -0.0030185 | 0.0414563 | 0.0444747 |
| 31 | LP | 0.0035770 | 0.0466241 | 0.0430471 |
| 32 | MDR | -0.0000516 | 0.0436466 | 0.0436982 |
| 33 | ME | -0.0004452 | 0.0460496 | 0.0464948 |
| 34 | MET | 0.0032156 | 0.0471196 | 0.0439039 |
| 35 | MILK | -0.0038237 | 0.0437704 | 0.0475941 |
| 36 | MOB | 0.0008792 | 0.0466741 | 0.0457949 |
| 37 | MR | 0.0028254 | 0.0470341 | 0.0442086 |
| 38 | MS | 0.0090298 | 0.0540132 | 0.0449834 |
| 39 | MSL | 0.0160455 | 0.0589145 | 0.0428690 |
| 40 | MSP | -0.0023812 | 0.0427245 | 0.0451057 |
| 41 | MT | 0.0037810 | 0.0454361 | 0.0416552 |
| 42 | PROT | 0.0001421 | 0.0511577 | 0.0510155 |
| 43 | RUM | -0.0035598 | 0.0408464 | 0.0444062 |
| 44 | SCK | -0.0027305 | 0.0412117 | 0.0439421 |
| 45 | SCS | -0.0007321 | 0.0429152 | 0.0436472 |
| 46 | SH | 0.0004643 | 0.0439017 | 0.0434374 |
| 47 | ST | -0.0020640 | 0.0431099 | 0.0451738 |
| 48 | SU | 0.0039131 | 0.0476895 | 0.0437764 |
| 49 | TL | -0.0029717 | 0.0424665 | 0.0454382 |
| 50 | TU | -0.0036547 | 0.0410737 | 0.0447284 |
| 51 | UD | 0.0046866 | 0.0458862 | 0.0411996 |
| 52 | UF | -0.0020001 | 0.0410347 | 0.0430348 |
| 53 | UT | 0.0128310 | 0.0558310 | 0.0430000 |
| 54 | WL | -0.0000350 | 0.0424724 | 0.0425074 |
Adjusted_R_Squared_Model_MIN = Model without Birth
Year
Adjusted_R_Squared_MIN.BY = Model adding Birth Year